{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 玖零猴的Demo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compare_site_names(one,two):\n",
    "    df1 = pd.read_csv(one, header=None)\n",
    "    df2 = pd.read_csv(two, header=None)\n",
    "    data1 = df1.to_dict(orient='list')[0]\n",
    "    data2 = df2.to_dict(orient='list')[0]\n",
    "    diff1 = set(data1).difference(data2)\n",
    "    diff2 = set(data2).difference(data1)\n",
    "    return list(diff2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "hgg_diff = compare_site_names(\"18hgg.csv\",\"19hgg.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "49"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(hgg_diff)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "lgg_diff = compare_site_names(\"18lgg.csv\",\"19lgg.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(lgg_diff)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "flair_name = \"_flair.nii.gz\"\n",
    "t1_name = \"_t1.nii.gz\"\n",
    "t1ce_name = \"_t1ce.nii.gz\"\n",
    "t2_name = \"_t2.nii.gz\"\n",
    "mask_name = \"_seg.nii.gz\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "bratshgg_path = r\"D:\\Project\\CollegeDesign\\dataset\\BraTs2019\\MICCAI_BraTS_2019_Data_Training\\MICCAI_BraTS_2019_Data_Training\\HGG\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "bratslgg_path = r\"D:\\Project\\CollegeDesign\\dataset\\BraTs2019\\MICCAI_BraTS_2019_Data_Training\\MICCAI_BraTS_2019_Data_Training\\LGG\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "outputImg_path = r\".\\testImage\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "outputMask_path = r\".\\testMask\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.path.exists(outputImg_path):\n",
    "    os.mkdir(outputImg_path)\n",
    "if not os.path.exists(outputMask_path):\n",
    "    os.mkdir(outputMask_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "pathhgg_list = []\n",
    "pathlgg_list = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "for idx in range(len(hgg_diff)):\n",
    "    mystr = \"BraTS19\" + hgg_diff[idx]\n",
    "    pathhgg_list.append(mystr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['BraTS19_CBICA_AWX_1',\n",
       " 'BraTS19_TMC_06290_1',\n",
       " 'BraTS19_TMC_15477_1',\n",
       " 'BraTS19_CBICA_BBG_1',\n",
       " 'BraTS19_TMC_30014_1',\n",
       " 'BraTS19_CBICA_AVB_1',\n",
       " 'BraTS19_TMC_06643_1',\n",
       " 'BraTS19_CBICA_AUX_1',\n",
       " 'BraTS19_CBICA_AVT_1',\n",
       " 'BraTS19_CBICA_BHQ_1',\n",
       " 'BraTS19_CBICA_BLJ_1',\n",
       " 'BraTS19_CBICA_BCF_1',\n",
       " 'BraTS19_CBICA_BCL_1',\n",
       " 'BraTS19_CBICA_BGG_1',\n",
       " 'BraTS19_CBICA_BGN_1',\n",
       " 'BraTS19_CBICA_AVF_1',\n",
       " 'BraTS19_CBICA_BGX_1',\n",
       " 'BraTS19_CBICA_BAX_1',\n",
       " 'BraTS19_CBICA_AYG_1',\n",
       " 'BraTS19_CBICA_BHV_1',\n",
       " 'BraTS19_CBICA_APK_1',\n",
       " 'BraTS19_TMC_21360_1',\n",
       " 'BraTS19_CBICA_BDK_1',\n",
       " 'BraTS19_TMC_27374_1',\n",
       " 'BraTS19_TMC_11964_1',\n",
       " 'BraTS19_CBICA_ASR_1',\n",
       " 'BraTS19_CBICA_AUA_1',\n",
       " 'BraTS19_CBICA_BGT_1',\n",
       " 'BraTS19_CBICA_BEM_1',\n",
       " 'BraTS19_CBICA_AOS_1',\n",
       " 'BraTS19_CBICA_BAP_1',\n",
       " 'BraTS19_CBICA_AOC_1',\n",
       " 'BraTS19_CBICA_BNR_1',\n",
       " 'BraTS19_CBICA_BGE_1',\n",
       " 'BraTS19_CBICA_AYC_1',\n",
       " 'BraTS19_CBICA_ATN_1',\n",
       " 'BraTS19_CBICA_BGO_1',\n",
       " 'BraTS19_CBICA_BHZ_1',\n",
       " 'BraTS19_CBICA_BKV_1',\n",
       " 'BraTS19_CBICA_AUW_1',\n",
       " 'BraTS19_CBICA_BIC_1',\n",
       " 'BraTS19_CBICA_ANV_1',\n",
       " 'BraTS19_CBICA_AWV_1',\n",
       " 'BraTS19_TMC_12866_1',\n",
       " 'BraTS19_CBICA_BGW_1',\n",
       " 'BraTS19_CBICA_BGR_1',\n",
       " 'BraTS19_CBICA_BJY_1',\n",
       " 'BraTS19_CBICA_BAN_1',\n",
       " 'BraTS19_CBICA_ASF_1']"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pathhgg_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "for idx in range(len(lgg_diff)):\n",
    "    mystr = \"BraTS19\" + lgg_diff[idx]\n",
    "    pathlgg_list.append(mystr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['BraTS19_TMC_09043_1']"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pathlgg_list"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 以上打印出来的就是BraTS19训练集中新增的那些病例，我们把它们作为测试集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "import SimpleITK as sitk"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def normalize(slice, bottom=99, down=1):\n",
    "    \"\"\"\n",
    "    normalize image with mean and std for regionnonzero,and clip the value into range\n",
    "    :param slice:\n",
    "    :param bottom:\n",
    "    :param down:\n",
    "    :return:\n",
    "    \"\"\"\n",
    "    #有点像“去掉最低分去掉最高分”的意思,使得数据集更加“公平”\n",
    "    b = np.percentile(slice, bottom)\n",
    "    t = np.percentile(slice, down)\n",
    "    slice = np.clip(slice, t, b)#限定范围numpy.clip(a, a_min, a_max, out=None)\n",
    "\n",
    "    #除了黑色背景外的区域要进行标准化\n",
    "    image_nonzero = slice[np.nonzero(slice)]\n",
    "    if np.std(slice) == 0 or np.std(image_nonzero) == 0:\n",
    "        return slice\n",
    "    else:\n",
    "        tmp = (slice - np.mean(image_nonzero)) / np.std(image_nonzero)\n",
    "        # since the range of intensities is between 0 and 5000 ,\n",
    "        # the min in the normalized slice corresponds to 0 intensity in unnormalized slice\n",
    "        # the min is replaced with -9 just to keep track of 0 intensities\n",
    "        # so that we can discard those intensities afterwards when sampling random patches\n",
    "        tmp[tmp == tmp.min()] = -9 #黑色背景区域\n",
    "        return tmp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def crop_ceter(img,croph,cropw):   \n",
    "    #for n_slice in range(img.shape[0]):\n",
    "    height,width = img[0].shape \n",
    "    starth = height//2-(croph//2)\n",
    "    startw = width//2-(cropw//2)        \n",
    "    return img[:,starth:starth+croph,startw:startw+cropw]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "BraTS19_CBICA_AWX_1\n",
      "BraTS19_TMC_06290_1\n",
      "BraTS19_TMC_15477_1\n",
      "BraTS19_CBICA_BBG_1\n",
      "BraTS19_TMC_30014_1\n",
      "BraTS19_CBICA_AVB_1\n",
      "BraTS19_TMC_06643_1\n",
      "BraTS19_CBICA_AUX_1\n",
      "BraTS19_CBICA_AVT_1\n",
      "BraTS19_CBICA_BHQ_1\n",
      "BraTS19_CBICA_BLJ_1\n",
      "BraTS19_CBICA_BCF_1\n",
      "BraTS19_CBICA_BCL_1\n",
      "BraTS19_CBICA_BGG_1\n",
      "BraTS19_CBICA_BGN_1\n",
      "BraTS19_CBICA_AVF_1\n",
      "BraTS19_CBICA_BGX_1\n",
      "BraTS19_CBICA_BAX_1\n",
      "BraTS19_CBICA_AYG_1\n",
      "BraTS19_CBICA_BHV_1\n",
      "BraTS19_CBICA_APK_1\n",
      "BraTS19_TMC_21360_1\n",
      "BraTS19_CBICA_BDK_1\n",
      "BraTS19_TMC_27374_1\n",
      "BraTS19_TMC_11964_1\n",
      "BraTS19_CBICA_ASR_1\n",
      "BraTS19_CBICA_AUA_1\n",
      "BraTS19_CBICA_BGT_1\n",
      "BraTS19_CBICA_BEM_1\n",
      "BraTS19_CBICA_AOS_1\n",
      "BraTS19_CBICA_BAP_1\n",
      "BraTS19_CBICA_AOC_1\n",
      "BraTS19_CBICA_BNR_1\n",
      "BraTS19_CBICA_BGE_1\n",
      "BraTS19_CBICA_AYC_1\n",
      "BraTS19_CBICA_ATN_1\n",
      "BraTS19_CBICA_BGO_1\n",
      "BraTS19_CBICA_BHZ_1\n",
      "BraTS19_CBICA_BKV_1\n",
      "BraTS19_CBICA_AUW_1\n",
      "BraTS19_CBICA_BIC_1\n",
      "BraTS19_CBICA_ANV_1\n",
      "BraTS19_CBICA_AWV_1\n",
      "BraTS19_TMC_12866_1\n",
      "BraTS19_CBICA_BGW_1\n",
      "BraTS19_CBICA_BGR_1\n",
      "BraTS19_CBICA_BJY_1\n",
      "BraTS19_CBICA_BAN_1\n",
      "BraTS19_CBICA_ASF_1\n",
      "Done！\n"
     ]
    }
   ],
   "source": [
    "for subsetindex in range(len(pathhgg_list)):\n",
    "    brats_subset_path = bratshgg_path + \"/\" + (pathhgg_list[subsetindex]) + \"/\"\n",
    "    #获取每个病例的四个模态及Mask的路径\n",
    "    flair_image = brats_subset_path + (pathhgg_list[subsetindex]) + flair_name\n",
    "    t1_image = brats_subset_path + (pathhgg_list[subsetindex]) + t1_name\n",
    "    t1ce_image = brats_subset_path + (pathhgg_list[subsetindex]) + t1ce_name\n",
    "    t2_image = brats_subset_path + (pathhgg_list[subsetindex]) + t2_name\n",
    "    mask_image = brats_subset_path + (pathhgg_list[subsetindex]) + mask_name\n",
    "    #获取每个病例的四个模态及Mask数据\n",
    "    flair_src = sitk.ReadImage(flair_image, sitk.sitkInt16)\n",
    "    t1_src = sitk.ReadImage(t1_image, sitk.sitkInt16)\n",
    "    t1ce_src = sitk.ReadImage(t1ce_image, sitk.sitkInt16)\n",
    "    t2_src = sitk.ReadImage(t2_image, sitk.sitkInt16)\n",
    "    mask = sitk.ReadImage(mask_image, sitk.sitkUInt8)\n",
    "    #GetArrayFromImage()可用于将SimpleITK对象转换为ndarray\n",
    "    flair_array = sitk.GetArrayFromImage(flair_src)\n",
    "    t1_array = sitk.GetArrayFromImage(t1_src)\n",
    "    t1ce_array = sitk.GetArrayFromImage(t1ce_src)\n",
    "    t2_array = sitk.GetArrayFromImage(t2_src)\n",
    "    mask_array = sitk.GetArrayFromImage(mask)\n",
    "    #对四个模态分别进行标准化,由于它们对比度不同\n",
    "    flair_array_nor = normalize(flair_array)\n",
    "    t1_array_nor = normalize(t1_array)\n",
    "    t1ce_array_nor = normalize(t1ce_array)\n",
    "    t2_array_nor = normalize(t2_array)\n",
    "    #裁剪(偶数才行)\n",
    "    flair_crop = crop_ceter(flair_array_nor,160,160)\n",
    "    t1_crop = crop_ceter(t1_array_nor,160,160)\n",
    "    t1ce_crop = crop_ceter(t1ce_array_nor,160,160)\n",
    "    t2_crop = crop_ceter(t2_array_nor,160,160)\n",
    "    mask_crop = crop_ceter(mask_array,160,160) \n",
    "    print((pathhgg_list[subsetindex]))\n",
    "    #切片处理,并去掉没有病灶的切片\n",
    "    for n_slice in range(flair_crop.shape[0]):\n",
    "        if np.max(mask_crop[n_slice,:,:]) != 0:\n",
    "            maskImg = mask_crop[n_slice,:,:]\n",
    "            \n",
    "            FourModelImageArray = np.zeros((flair_crop.shape[1],flair_crop.shape[2],4),np.float)\n",
    "            flairImg = flair_crop[n_slice,:,:]\n",
    "            flairImg = flairImg.astype(np.float)\n",
    "            FourModelImageArray[:,:,0] = flairImg\n",
    "            t1Img = t1_crop[n_slice,:,:]\n",
    "            t1Img = t1Img.astype(np.float)\n",
    "            FourModelImageArray[:,:,1] = t1Img\n",
    "            t1ceImg = t1ce_crop[n_slice,:,:]\n",
    "            t1ceImg = t1ceImg.astype(np.float)\n",
    "            FourModelImageArray[:,:,2] = t1ceImg\n",
    "            t2Img = t2_crop[n_slice,:,:]\n",
    "            t2Img = t2Img.astype(np.float)\n",
    "            FourModelImageArray[:,:,3] = t2Img       \n",
    "        \n",
    "            imagepath = outputImg_path + \"\\\\\" + (pathhgg_list[subsetindex]) + \"_\" + str(n_slice) + \".npy\"\n",
    "            maskpath = outputMask_path + \"\\\\\" + (pathhgg_list[subsetindex]) + \"_\" + str(n_slice) + \".npy\"\n",
    "            np.save(imagepath,FourModelImageArray)#(160,160,4) np.float dtype('float64')\n",
    "            np.save(maskpath,maskImg)# (160, 160) dtype('uint8') 值为0 1 2 4\n",
    "print(\"Done！\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "BraTS19_TMC_09043_1\n",
      "Done！\n"
     ]
    }
   ],
   "source": [
    "for subsetindex in range(len(pathlgg_list)):\n",
    "    brats_subset_path = bratslgg_path + \"/\" + (pathlgg_list[subsetindex]) + \"/\"\n",
    "    #获取每个病例的四个模态及Mask的路径\n",
    "    flair_image = brats_subset_path + (pathlgg_list[subsetindex]) + flair_name\n",
    "    t1_image = brats_subset_path + (pathlgg_list[subsetindex]) + t1_name\n",
    "    t1ce_image = brats_subset_path + (pathlgg_list[subsetindex]) + t1ce_name\n",
    "    t2_image = brats_subset_path + (pathlgg_list[subsetindex]) + t2_name\n",
    "    mask_image = brats_subset_path + (pathlgg_list[subsetindex]) + mask_name\n",
    "    #获取每个病例的四个模态及Mask数据\n",
    "    flair_src = sitk.ReadImage(flair_image, sitk.sitkInt16)\n",
    "    t1_src = sitk.ReadImage(t1_image, sitk.sitkInt16)\n",
    "    t1ce_src = sitk.ReadImage(t1ce_image, sitk.sitkInt16)\n",
    "    t2_src = sitk.ReadImage(t2_image, sitk.sitkInt16)\n",
    "    mask = sitk.ReadImage(mask_image, sitk.sitkUInt8)\n",
    "    #GetArrayFromImage()可用于将SimpleITK对象转换为ndarray\n",
    "    flair_array = sitk.GetArrayFromImage(flair_src)\n",
    "    t1_array = sitk.GetArrayFromImage(t1_src)\n",
    "    t1ce_array = sitk.GetArrayFromImage(t1ce_src)\n",
    "    t2_array = sitk.GetArrayFromImage(t2_src)\n",
    "    mask_array = sitk.GetArrayFromImage(mask)\n",
    "    #对四个模态分别进行标准化,由于它们对比度不同\n",
    "    flair_array_nor = normalize(flair_array)\n",
    "    t1_array_nor = normalize(t1_array)\n",
    "    t1ce_array_nor = normalize(t1ce_array)\n",
    "    t2_array_nor = normalize(t2_array)\n",
    "    #裁剪(偶数才行)\n",
    "    flair_crop = crop_ceter(flair_array_nor,160,160)\n",
    "    t1_crop = crop_ceter(t1_array_nor,160,160)\n",
    "    t1ce_crop = crop_ceter(t1ce_array_nor,160,160)\n",
    "    t2_crop = crop_ceter(t2_array_nor,160,160)\n",
    "    mask_crop = crop_ceter(mask_array,160,160) \n",
    "    print((pathlgg_list[subsetindex]))\n",
    "    #切片处理,并去掉没有病灶的切片\n",
    "    for n_slice in range(flair_crop.shape[0]):\n",
    "        if np.max(mask_crop[n_slice,:,:]) != 0:\n",
    "            maskImg = mask_crop[n_slice,:,:]\n",
    "            \n",
    "            FourModelImageArray = np.zeros((flair_crop.shape[1],flair_crop.shape[2],4),np.float)\n",
    "            flairImg = flair_crop[n_slice,:,:]\n",
    "            flairImg = flairImg.astype(np.float)\n",
    "            FourModelImageArray[:,:,0] = flairImg\n",
    "            t1Img = t1_crop[n_slice,:,:]\n",
    "            t1Img = t1Img.astype(np.float)\n",
    "            FourModelImageArray[:,:,1] = t1Img\n",
    "            t1ceImg = t1ce_crop[n_slice,:,:]\n",
    "            t1ceImg = t1ceImg.astype(np.float)\n",
    "            FourModelImageArray[:,:,2] = t1ceImg\n",
    "            t2Img = t2_crop[n_slice,:,:]\n",
    "            t2Img = t2Img.astype(np.float)\n",
    "            FourModelImageArray[:,:,3] = t2Img       \n",
    "        \n",
    "            imagepath = outputImg_path + \"\\\\\" + (pathlgg_list[subsetindex]) + \"_\" + str(n_slice) + \".npy\"\n",
    "            maskpath = outputMask_path + \"\\\\\" + (pathlgg_list[subsetindex]) + \"_\" + str(n_slice) + \".npy\"\n",
    "            np.save(imagepath,FourModelImageArray)#(160,160,4) np.float dtype('float64')\n",
    "            np.save(maskpath,maskImg)# (160, 160) dtype('uint8') 值为0 1 2 4\n",
    "print(\"Done！\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
